This walkthrough describes the population genetics analysis of SNPs generated from Pseudodiplora strigosa samples collected across urbanized and reef habitats in southeast Florida. Sequence alignments are based on an unpublished Pseudodiploria strigosa genome assembly by the Aquatic Symbiosis Genomics Project: https://www.aquaticsymbiosisgenomics.org/. For initial processing of 2bRAD reads regardless of species, and the species-specific 2bRAD pipeline, please see below:
Library prep, bioinformatics, and analysis protocols are credited to the 2bRAD pipeline originally developed by Misha Matz: https://doi.org/10.1038/nmeth.2023, and further refined by Ryan Eckert: https://ryaneckert.github.io/Stephanocoenia_FKNMS_PopGen/code/
For the following analyses we will require the use of a number of different R packages. We can use the following code to quickly load in the packages and install any packages not previously installed in the R console.
if (!require("pacman")) install.packages("pacman")
pacman::p_load_gh("pmartinezarbizu/pairwiseAdonis/pairwiseAdonis", "ropensci/rnaturalearthhires", "KarstensLab/microshades")
pacman::p_load("cowplot", "car", "ggrepel", "ggspatial", "paletteer", "patchwork", "rgdal", "rnaturalearth", "sf", "Hmisc", "MCMC.OTU", "pairwiseAdonis", "RColorBrewer", "Redmonder", "flextable", "lubridate", "officer", "adegenet", "dendextend", "gdata", "ggdendro", "hierfstat", "kableExtra", "poppr", "reshape2", "StAMPP", "vcfR", "vegan", "boa", "magick", "rgeos", "sdmpredictors", "ggcorrplot", "tidyverse", "TeachingDemos", "LaplacesDemon", "adespatial", "ggnewscale", "ggbeeswarm", "multcomp", "rstatix", "R.utils", "graph4lg")
options("scipen" = 10)
Making color palettes to use throughout all plots
Analyzing 2bRAD generated SNPs (20,323 loci) for population structure/genetic connectivity across urbanized and reef sites in southeast Florida
Identification of any natural clones using technical replicates as a baseline for clonality between samples
pacman::p_load("dendextend", "ggdendro", "tidyverse")
cloneBams = read.csv("../../data/pstr/pstrMetadata.csv") # list of bam files
cloneMa = as.matrix(read.table("../../data/pstr/ANGSD/clones/pstrClones.ibsMat")) # reads in IBS matrix produced by ANGSD
dimnames(cloneMa) = list(cloneBams[,2],cloneBams[,2])
clonesHc = hclust(as.dist(cloneMa),"ave")
clonePops = cloneBams$region
cloneSite = cloneBams$site
cloneDend = cloneMa %>% as.dist() %>% hclust(.,"ave") %>% as.dendrogram()
cloneDData = cloneDend %>% dendro_data()
# Making the branches hang shorter so we can easily see clonal groups
cloneDData$segments$yend2 = cloneDData$segments$yend
for(i in 1:nrow(cloneDData$segments)) {
if (cloneDData$segments$yend2[i] == 0) {
cloneDData$segments$yend2[i] = (cloneDData$segments$y[i] - 0.01)}}
cloneDendPoints = cloneDData$labels
cloneDendPoints$region = clonePops[order.dendrogram(cloneDend)]
cloneDendPoints$site=cloneSite[order.dendrogram(cloneDend)]
rownames(cloneDendPoints) = cloneDendPoints$label
cloneDendPoints$region = as.factor(cloneDendPoints$region)
cloneDendPoints$site = as.factor(cloneDendPoints$site)
# Making points at the leaves to place symbols for regions
point = as.vector(NA)
for(i in 1:nrow(cloneDData$segments)) {
if (cloneDData$segments$yend[i] == 0) {
point[i] = cloneDData$segments$y[i] - 0.01
} else {
point[i] = NA}}
cloneDendPoints$y = point[!is.na(point)]
techReps = c("urban_029", "urban_029_2", "urban_029_3", "urban_063", "urban_063_2", "urban_063_3", "urban_090", "urban_090_2", "urban_090_3", "urban_216", "urban_216_2", "urban_216_3")
cloneDendPoints$site = factor(cloneDendPoints$site,levels(cloneDendPoints$site)[c(5, 4, 16, 17, 7, 3, 13, 19, 12, 6, 2, 18, 15, 11, 8, 10, 9, 1, 20, 14)])
cloneDendPoints$region = factor(cloneDendPoints$region,levels(cloneDendPoints$region)[c(2, 3, 4, 5, 1, 6)])
Pal <- c("#E69F00", "#56B4E9", "#009E73", "#0072B2", "#D55E00", "#CC79A7", "#999999", "#E41A1C", "#377EB8", "#4DAF4A", "#984EA3", "#FF7F00", "#A65628", "#66C2A5", "#FC8D62", "#8DA0CB", "#E78AC3", "#A6D854", "#FFD92F", "#B3B3B3")
cloneDendA = ggplot() +
geom_segment(data = segment(cloneDData), aes(x = x, y = y, xend = xend, yend = yend2), size = 0.5) +
geom_point(data = cloneDendPoints, aes(x = x, y = y, fill = site, shape = region), size = 4, stroke = 0.25) +
# scale_fill_brewer(palette = "Dark2", name = "Site") +
scale_fill_manual(values = Pal, name= "Site")+
scale_shape_manual(values = c(21, 22, 23, 24, 25, 8), name = "Region")+
geom_hline(yintercept = 0.14, color = "red", lty = 5, size = 0.75) + # creating a dashed line to indicate a clonal distance threshold
geom_text(data = subset(cloneDendPoints, subset = label %in% techReps), aes(x = x, y = (y - .045), label = label), angle = 90) + # spacing technical replicates further from leaf
geom_text(data = subset(cloneDendPoints, subset = !label %in% techReps), aes(x = x, y = (y - .025), label = label), angle = 90) +
labs(y = "Genetic distance (1 - IBS)") +
guides(fill = guide_legend(override.aes = list(shape = 22)))+
theme_classic()
cloneDend = cloneDendA + theme(
axis.title.x = element_blank(),
axis.text.x = element_blank(),
axis.line.x = element_blank(),
axis.ticks.x = element_blank(),
axis.title.y = element_text(size = 12, color = "black", angle = 90),
axis.text.y = element_text(size = 10, color = "black"),
axis.line.y = element_line(),
axis.ticks.y = element_line(),
panel.grid = element_blank(),
panel.border = element_blank(),
panel.background = element_rect(fill = "white", colour = NA),
plot.background = element_rect(fill = "white", colour = NA),
legend.key = element_blank(),
legend.title = element_text(size = 12),
legend.text = element_text(size = 10),
legend.position = "bottom")
cloneDend
ggsave("../../figures/pstr/cloneDend.pdf", plot = cloneDend, height = 8, width = 24, units = "in", dpi = 300)
ggsave("../../figures/pstr/cloneDend.png", plot = cloneDend, height = 8, width = 24, units = "in", dpi = 300)
We removed the technical replicates/clones and re-ran ANGSD for all the following analyses
pacman::p_load("dendextend", "ggdendro", "tidyverse")
bams = read.csv("../../data/pstr/pstrMetadata.csv")[-c(5, 7, 16, 24, 28, 31, 34, 38, 45, 46, 50, 51, 58, 61, 62, 63, 64, 65, 69, 70, 72, 73, 74, 75, 78, 79, 83, 89, 90, 106, 113, 139),] # list of bams files and their populations (tech reps removed)
ma = as.matrix(read.table("../../data/pstr/ANGSD/pstrNoClones.ibsMat")) # reads in IBS matrix produced by ANGSD
dimnames(ma) = list(bams[,2],bams[,2])
Hc = hclust(as.dist(ma),"ave")
Pops = bams$region
Site = bams$site
Dend = ma %>% as.dist() %>% hclust(.,"ave") %>% as.dendrogram()
DData = Dend %>% dendro_data()
# Making the branches hang shorter so we can easily see clonal groups
DData$segments$yend2 = DData$segments$yend
for(i in 1:nrow(DData$segments)) {
if (DData$segments$yend2[i] == 0) {
DData$segments$yend2[i] = (DData$segments$y[i] - 0.01)}}
DendPoints = DData$labels
DendPoints$region = Pops[order.dendrogram(Dend)]
DendPoints$site=Site[order.dendrogram(Dend)]
rownames(DendPoints) = DendPoints$label
DendPoints$region = as.factor(DendPoints$region)
DendPoints$site = as.factor(DendPoints$site)
# Making points at the leaves to place symbols for regions
point = as.vector(NA)
for(i in 1:nrow(DData$segments)) {
if (DData$segments$yend[i] == 0) {
point[i] = DData$segments$y[i] - 0.01
} else {
point[i] = NA}}
DendPoints$y = point[!is.na(point)]
DendPoints$site = factor(DendPoints$site,levels(DendPoints$site)[c(5, 4, 16, 17, 7, 3, 13, 19, 12, 6, 2, 18, 15, 11, 8, 10, 9, 1, 20, 14)])
DendPoints$region = factor(DendPoints$region,levels(DendPoints$region)[c(2, 3, 4, 5, 1, 6)])
Pal <- c("#E69F00", "#56B4E9", "#009E73", "#0072B2", "#D55E00", "#CC79A7", "#999999", "#E41A1C", "#377EB8", "#4DAF4A", "#984EA3", "#FF7F00", "#A65628", "#66C2A5", "#FC8D62", "#8DA0CB", "#E78AC3", "#A6D854", "#FFD92F", "#B3B3B3")
DendA = ggplot() +
geom_segment(data = segment(DData), aes(x = x, y = y, xend = xend, yend = yend2), size = 0.5) +
geom_point(data = DendPoints, aes(x = x, y = y, fill = site, shape = region), size = 4, stroke = 0.25) +
# scale_fill_brewer(palette = "Dark2", name = "Site") +
scale_fill_manual(values = Pal, name= "Site")+
scale_shape_manual(values = c(21, 22, 23, 24, 25, 8), name = "Region")+
labs(y = "Genetic distance (1 - IBS)") +
guides(fill = guide_legend(override.aes = list(shape = 22)))+
theme_classic()
Dend = DendA + theme(
axis.title.x = element_blank(),
axis.text.x = element_blank(),
axis.line.x = element_blank(),
axis.ticks.x = element_blank(),
axis.title.y = element_text(size = 12, color = "black", angle = 90),
axis.text.y = element_text(size = 10, color = "black"),
axis.line.y = element_line(),
axis.ticks.y = element_line(),
panel.grid = element_blank(),
panel.border = element_blank(),
panel.background = element_rect(fill = "white", colour = NA),
plot.background = element_rect(fill = "white", colour = NA),
legend.key = element_blank(),
legend.title = element_text(size = 12),
legend.text = element_text(size = 10),
legend.position = "bottom")
Dend
ggsave("../../figures/pstr/Dend.pdf", plot = Dend, height = 8, width = 24, units = "in", dpi = 300)
ggsave("../../figures/pstr/Dend.png", plot = Dend, height = 8, width = 24, units = "in", dpi = 300)